#! /usr/bin/env python3
#
# Run semgrep-core on a series of pairs (target, pattern) and report the
# time it takes. Optionally upload the results to the semgrep dashboard.
import argparse
import json
import os
import subprocess
import time
from typing import List


# This script assumes it's run from the current directory
SEMGREP_CORE = "../../bin/opengrep-core"


class Test:
    def __init__(self, name: str, lang: str, target: str, is_config: bool = False):
        # Test name. Pattern file name is NAME.sgrep.
        self.name = name

        # Language ID
        self.lang = lang

        # Target file name
        self.target = target

        # Whether the file is a config
        self.is_config = is_config


TESTS = [
    # Control. No other test can be faster than this one.
    Test("empty", "js", "empty.js"),
    #
    # Match on a simple statement, no ellipsis. Serves as the baseline
    # for the other tests on the same file.
    Test("stmts-simple", "js", "l10000.js"),
    # Single ellipsis without backreference: $A; ... foo($B);
    Test("stmts-ellipsis-ab", "js", "l10000.js"),
    # Single ellipsis on statements, with backreference to statement
    # that matches in many places: $A; ... foo($A);
    Test("stmts-ellipsis-aa", "js", "l10000.js"),
    # Double ellipsis on statements without backref: $A; ... $B; ... foo($C);
    Test("stmts-double-ellipsis-abc", "js", "l1000.js"),
    # Double ellipsis on statements with backref: $A; ... $B; ... foo($B);
    Test("stmts-double-ellipsis-abb", "js", "l1000.js"),
    # Double ellipsis on statements with backref: $A; ... $A; ... foo($B);
    Test("stmts-double-ellipsis-aab", "js", "l1000.js"),
    # Double ellipsis on statements with backref: $A; ... $B; ... foo($A);
    Test("stmts-double-ellipsis-aba", "js", "l1000.js"),
    # Double ellipsis on statements with backrefs: $A; ... $A; ... foo($A);
    Test("stmts-double-ellipsis-aaa", "js", "l1000.js"),
    # Svalue analysis of a large function
    Test("svalue", "csharp", "l5000.cs"),
    # Stress test for regexp matching
    Test("semgrep_targets", "regex", "semgrep_targets.txt", True),
    #
    # Real world examples (.sgrep or .yaml files when True last argument)
    #
    # Metavariable as a key in a class definition with ellipsis
    Test("anyorder-metavar-key", "java", "l5000.java"),
    # Two pattern-not-insides with ellipsis
    Test("stmts-ellipsis-inside", "js", "l1000.js", True),
    # Multiple pattern-nots with multiple pattern-eithrs
    Test("metavariable-regexp-dict", "generic", "l5000.json", True),
    Test("metavariable-regexp-dict2", "json", "l5000.json", True),
    #
    # TODO, not yet optimized
    #
    # Multiple pattern-nots with multiple pattern-eithrs
    Test("range-stmts", "python", "l300.py", True),
]


class Result:
    def __init__(self, name: str, duration: float):
        self.name = name
        self.duration = duration


# RIP semgrep-core -f
# This creates a semgrep rule from a semgrep pattern found in a file.
def make_rule_file(dirname: str, name: str, lang: str) -> str:
    pattern_file = os.path.join(dirname, name + ".sgrep")
    rule_file = os.path.join(dirname, f"{name}.tmp.yaml")
    with open(pattern_file, "r") as file:
        pattern = file.read()
    with open(rule_file, "w") as file:
        file.write(
            f"""
# generated from {pattern_file}
rules:
- id: test
  pattern: {json.dumps(pattern)}
  languages: [{lang}]
  message: found something
  severity: ERROR
"""
        )
    return rule_file


def run_tests(
    dirname: str, tests: List[Test], no_cache: bool, max_cache: bool
) -> List[Result]:
    results = []
    for test in tests:
        if test.is_config:
            rule_file = os.path.join(dirname, test.name + ".yaml")
        else:
            rule_file = make_rule_file(dirname, test.name, test.lang)

        target_file = os.path.join(dirname, test.target)
        target_file_gz = os.path.join(dirname, test.target + ".gz")

        if os.path.isfile(target_file_gz):
            subprocess.check_call(["gunzip", "-k", "-f", target_file_gz])

        t1 = time.time()
        args = [
            SEMGREP_CORE,
            "-lang",
            test.lang,
            "-rules",
            rule_file,
            target_file,
        ]
        if no_cache:
            args.append("-no_opt_cache")
        elif max_cache:
            args.append("-opt_max_cache")
        p = subprocess.run(args)  # nosem
        t2 = time.time()

        p.check_returncode()
        duration = t2 - t1
        print(f"{test.name}: {duration:.6f} s")
        results.append(Result(test.name, duration))
    return results


def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument(
        "name", metavar="NAME", type=str, nargs="?", help="run just this test"
    )
    parser.add_argument(
        "--list", help="list available tests and exit", action="store_true"
    )
    parser.add_argument(
        "--no-cache", help="disable caching during matching", action="store_true"
    )
    parser.add_argument(
        "--max-cache", help="use most aggressive caching", action="store_true"
    )
    args = parser.parse_args()

    if args.list:
        for test in TESTS:
            print(test.name)
    else:
        tests = []
        if args.name:
            tests = [test for test in TESTS if test.name == args.name]
        else:
            tests = TESTS
        results = run_tests("input", tests, args.no_cache, args.max_cache)


if __name__ == "__main__":
    main()
